********************************************************************************************************************************
***   Replication file for:                                                                                                  ***
***   Berbee, P., Braun, S. T. and Franke, R. (2024). Reversing Fortunes of German Regions, 1926-2019. JoEG.			     ***
***   							                                                                                             ***
***   SCRIPT: 	_x2c_preparation_census_1907.do																				 ***	
***   PURPOSE: 	Reads in and prepares data from the 1907 occupation census												 	 ***
********************************************************************************************************************************


* Preamble (unnecessary when executing run.do)
run "$reversing/scripts/programs/_config.do"

************
* Code begins
************

*** Import data

import excel "$reversing/data/GewerbeBetriebszaehlung_1907_3digit.xlsx", sheet("Data") first clear

destring T_ID, replace
rename T_ID labor_market_id

**	Create employment outcomes from Berufszählung
gen total_empl=mainocc_fAF+mainocc_mAF // Employment in  1907
gen total_pop=mainocc_fAF+mainocc_mAF+familyAF // Population in  1907

** Industrial employment share

gen empshare_ind_1907 = (mainocc_mB + mainocc_fB) / total_empl
label var empshare_ind_1907 "Employment share in industry (incl. mining and construction), 1907"

egen empshare_ind_1907_std=std(empshare_ind_1907)
label var empshare_ind_1907_std "Employment share in industry (incl. mining and construction), 1907 (standardized)"



** Create different indicators for firm size

/* Overview of sectors with employment share in firms with at least 200 employees of 50% or larger */

/*
     +-----------------------------------------------------------------+
     | gewerb~d                     gewerbe_name   shar~201   total_~t |
     |-----------------------------------------------------------------|
340. |   IX c 1                    Seidenweberei   .5000595      67192 |
341. |  IV f 1        Glashütten, Glasveredelung   .5005081      88564 |
342. |   IX a 3              Flachsröstanstalten   .5017483       1144 |
343. |   V c 1                     Eisengießerei   .5027826     165314 |
344. |   IX c 6                  Baumwollweberei   .5092512     159061 |
     |-----------------------------------------------------------------|
345. |   VI a 7        Eiserne Baukonstruktionen   .5112199      30036 |
346. |   XI a 3      Gefärbtes, lackiertes Leder    .524761      10460 |
347. |   VI c 4          Verf. von Fahrradteilen    .524918       3351 |
348. |   VII a           Chemische Großindustrie   .5267517      45156 |
349. |    X a 6             Bunt- u. Luxuspapier   .5273196      15520 |
     |-----------------------------------------------------------------|
350. |   IV c 3                  Mörtelbereitung   .5296314       2143 |
351. |   VI i 1            Fakrikation v. Lampen   .5388443      22912 |
352. |   IX c 3               Teppichfabrikation    .543013       9718 |
353. | XVII c 3       Kupfer- und Stahldruckerei   .5436447       1306 |
354. |   IX b 3                     Wollspinerei    .552942      58498 |
     |-----------------------------------------------------------------|
355. |   IV e 7            Spielw. Aus Porzellan   .5584906       1060 |
356. |   VI a 6                Nähmaschinenteile   .5955975       1590 |
357. |   VI c 2       Verfertigung v. Fahrrädern   .5969833      16508 |
358. | XIII a 8                Kakao, Schokolade   .6073483      17909 |
359. |   VI k 2               Akkumulatoren usw.   .6075376       3529 |
     |-----------------------------------------------------------------|
360. |  VI a 10           Buchdruckereimaschinen   .6216179       7318 |
361. | III e 3           Gewinnung von Bernstein    .629562       1096 |
362. |  VII d 5             Kohlenteerabkömminge   .6303297       6825 |
363. |   IV e 6                        Porzellan    .645573      51785 |
364. |  VII d 2                       Bleistifte   .6623526       3477 |
     |-----------------------------------------------------------------|
365. |   XI f 2           Gummireifenfabrikation   .6678832        548 |
366. |     VI d                       Schiffsbau   .6726857      49842 |
367. |   VI c 3       Fabrik. v. Kraftfahrzeugen     .68692      14549 |
368. |   IX b 7                Baumwollspinnerei   .6882507      98746 |
369. | VII e 1        Sprengstoffe, Sprengkörper   .7003458      26891 |
     |-----------------------------------------------------------------|
370. |  IV c 4                  Zement und Tratz   .7086776      30135 |
371. |  III b 1       Silber, Kupfer-, Zinnhütt.   .7147303      35668 |
372. |   VI k 4            And. elektr. Apparate   .7179829      42001 |
373. |    V c 2           Schwarz- und Weißblech   .7278369      22075 |
374. |   VI e 3             Sonstige Schußwaffen   .7403908      10797 |
     |-----------------------------------------------------------------|
375. |  III a 2                Eisenerzbergwerke   .7424695      34792 |
376. |   VI k 7   Fbr. v. Erd- u. Seekabeln usw.   .7428824      10186 |
377. |   VI k 3         Elektr. Telegraphen usw.    .746871       7830 |
378. |  XXI b 1        See- und Küstenschiffahrt   .7480435      60697 |
379. |   XI f 1                       Gummiwaren   .7518232      28383 |
     |-----------------------------------------------------------------|
380. |  XXI a 2               Straßenbahnbetrieb   .7758752      48531 |
381. |   V c 14           Schreibfedern a. Stahl   .7768795       1237 |
382. | III c 1                     Salzbergwerke    .795997      19735 |
383. |   IX b 5          Flachs- u. Hanfhechelei   .8242225      18586 |
384. |   IX c 5                      Juteweberei   .8293813       9067 |
     |-----------------------------------------------------------------|
385. |  III d 4              Steinkohlenbriketts   .8300428       8408 |
386. |   VI a 5                     Nähmaschinen   .8303881      18448 |
387. |     XI d             Lineoleumfabrikation   .8569807       2643 |
388. |  III b 3         Herst. V. Eisen u. Stahl   .8614709     170614 |
389. |  III a 1        Erzbergwerke (ohne Eisen)   .8646426      43906 |
     |-----------------------------------------------------------------|
390. |   IV e 5     Steingutfabr. u. -veredelung   .8662499      19514 |
391. |   VI a 1                   Dampfmaschinen   .8683411      69513 |
392. |  VII d 4             Anilin, Anilinfarben   .8952706       9071 |
393. |   VI k 1       Stromerzeugungsmasch. usw.   .9087824      27703 |
394. |   IX b 6                    Jutespinnerei   .9286602      12868 |
     |-----------------------------------------------------------------|
395. |   VI e 2               Geschützgießereien   .9610648       7551 |
396. | III d 1              Steinkohlenbergwerke   .9857375     452866 |
     +-----------------------------------------------------------------+

*/

gen empshare_f201_ind_1907 = (gewpersIXc1 + gewpersIVf1 + gewpersIXa3 + gewpersVc1 + gewpersIXc6 + gewpersVIa7 + gewpersXIa3 + gewpersVIc4 + gewpersVIIa + gewpersXa6 + gewpersIVc3 + gewpersVIi1 + gewpersIXc3 + gewpersXVIIc3 + gewpersIXb3  + gewpersIVe7 + gewpersVIa6 + gewpersVIc2 + gewpersXIIIa8 + gewpersVIk2 + gewpersVIa10 + gewpersIIIe3 + gewpersVIId5 + gewpersIVe6 + gewpersVIId2 + gewpersXIf2 + gewpersVId + gewpersVIc3 + gewpersIXb7 + gewpersVIIe1 + gewpersIVc4 + gewpersIIIb1 + gewpersVIk4 + gewpersVc2 + gewpersVIe3 + gewpersIIIa2 + gewpersVIk7 + gewpersVIk3 + gewpersXXIb1 + gewpersXIf1 + gewpersXXIa2 + gewpersVc14 + gewpersIIIc1 + gewpersIXb5 + gewpersIXc5 + gewpersIIId4 + gewpersVIa5 + gewpersXId + gewpersIIIb3 + gewpersIIIa1 + gewpersIVe5 + gewpersVIa1 + gewpersVIId4 + gewpersVIk1 + gewpersIXb6 + gewpersVIe2 + gewpersIIId1) / total_empl
label var empshare_f201_ind_1907 "Employment share in industries where at least 50% work in firms with 201 employees or more, 1907"

gen popshare_f201_ind_1907 = (gewpersIXc1 + gewpersIVf1 + gewpersIXa3 + gewpersVc1 + gewpersIXc6 + gewpersVIa7 + gewpersXIa3 + gewpersVIc4 + gewpersVIIa + gewpersXa6 + gewpersIVc3 + gewpersVIi1 + gewpersIXc3 + gewpersXVIIc3 + gewpersIXb3  + gewpersIVe7 + gewpersVIa6 + gewpersVIc2 + gewpersXIIIa8 + gewpersVIk2 + gewpersVIa10 + gewpersIIIe3 + gewpersVIId5 + gewpersIVe6 + gewpersVIId2 + gewpersXIf2 + gewpersVId + gewpersVIc3 + gewpersIXb7 + gewpersVIIe1 + gewpersIVc4 + gewpersIIIb1 + gewpersVIk4 + gewpersVc2 + gewpersVIe3 + gewpersIIIa2 + gewpersVIk7 + gewpersVIk3 + gewpersXXIb1 + gewpersXIf1 + gewpersXXIa2 + gewpersVc14 + gewpersIIIc1 + gewpersIXb5 + gewpersIXc5 + gewpersIIId4 + gewpersVIa5 + gewpersXId + gewpersIIIb3 + gewpersIIIa1 + gewpersIVe5 + gewpersVIa1 + gewpersVIId4 + gewpersVIk1 + gewpersIXb6 + gewpersVIe2 + gewpersIIId1) / total_pop
label var popshare_f201_ind_1907 "Population share in industries where at least 50% work in firms with 201 employees or more, 1907"



/* Overview of sectors with employment share in firms with at least 500 employees of 50% or larger */

/*
     +-----------------------------------------------------------------+
     | gewerb~d                     gewerbe_name   shar~501   total_~t |
     |-----------------------------------------------------------------|
376. |     VI d                       Schiffsbau   .5024076      49842 |
377. |  XXI a 2               Straßenbahnbetrieb    .530053      48531 |
378. |   VI k 7   Fbr. v. Erd- u. Seekabeln usw.   .5312193      10186 |
379. |  VII d 5             Kohlenteerabkömminge   .5591209       6825 |
380. |   XI f 1                       Gummiwaren   .5781277      28383 |
     |-----------------------------------------------------------------|
381. |  VII d 2                       Bleistifte   .5812482       3477 |
382. |   IX b 6                    Jutespinnerei   .5977619      12868 |
383. |   IV e 5     Steingutfabr. u. -veredelung   .6014656      19514 |
384. |   VI k 4            And. elektr. Apparate   .6022714      42001 |
385. |  XXI b 1        See- und Küstenschiffahrt   .6126992      60697 |
     |-----------------------------------------------------------------|
386. |  III b 3         Herst. V. Eisen u. Stahl   .6390097     170614 |
387. |  III a 1        Erzbergwerke (ohne Eisen)   .6812736      43906 |
388. |   VI k 3         Elektr. Telegraphen usw.   .6841635       7830 |
389. |   VI e 3             Sonstige Schußwaffen   .6889877      10797 |
390. |   VI a 5                     Nähmaschinen   .7069601      18448 |
     |-----------------------------------------------------------------|
391. |   VI a 1                   Dampfmaschinen   .7163121      69513 |
392. |  VII d 4             Anilin, Anilinfarben   .7921949       9071 |
393. |  III d 4              Steinkohlenbriketts   .7944815       8408 |
394. |   VI k 1       Stromerzeugungsmasch. usw.   .8063025      27703 |
395. |   VI e 2               Geschützgießereien   .9170971       7551 |
     |-----------------------------------------------------------------|
396. | III d 1              Steinkohlenbergwerke   .9461938     452866 |
     +-----------------------------------------------------------------+
*/

gen empshare_f501_ind_1907 = (gewpersVId + gewpersXXIa2 + gewpersVIk7 + gewpersVIId5 + gewpersXIf1 + gewpersVIId2 + gewpersIXb6 + gewpersIVe5 + gewpersVIk4 + gewpersXXIb1 + gewpersIIIb3 + gewpersIIIa1 + gewpersVIk3 + gewpersVIe3 + gewpersVIa5 + gewpersVIa1 + gewpersVIId4 + gewpersIIId4 + gewpersVIk1 + gewpersVIe2 + gewpersIIId1) / total_empl
label var empshare_f501_ind_1907 "Employment share in industries where at least 50% work in firms with 500 employees or more, 1907"

gen popshare_f501_ind_1907 = (gewpersVId + gewpersXXIa2 + gewpersVIk7 + gewpersVIId5 + gewpersXIf1 + gewpersVIId2 + gewpersIXb6 + gewpersIVe5 + gewpersVIk4 + gewpersXXIb1 + gewpersIIIb3 + gewpersIIIa1 + gewpersVIk3 + gewpersVIe3 + gewpersVIa5 + gewpersVIa1 + gewpersVIId4 + gewpersIIId4 + gewpersVIk1 + gewpersVIe2 + gewpersIIId1) / total_pop
label var popshare_f501_ind_1907 "Population share in industries where at least 50% work in firms with 500 employees or more, 1907"

/* Overview of sectors with employment share in firms with at least 1000 employees of 50% or larger */
/*
     +-------------------------------------------------------------+
     | gewerb~d                 gewerbe_name   sha~1000   total_~t |
     |-------------------------------------------------------------|
389. |  III d 4          Steinkohlenbriketts    .521765       8408 |
390. |  XXI b 1    See- und Küstenschiffahrt   .5328929      60697 |
391. |   VI e 3         Sonstige Schußwaffen   .5580254      10797 |
392. |   VI k 3     Elektr. Telegraphen usw.   .6150702       7830 |
393. |  VII d 4         Anilin, Anilinfarben   .7186639       9071 |
     |-------------------------------------------------------------|
394. | III d 1          Steinkohlenbergwerke   .8009411     452866 |
395. |   VI k 1   Stromerzeugungsmasch. usw.   .8063025      27703 |
396. |   VI e 2           Geschützgießereien   .8294266       7551 |
     +-------------------------------------------------------------+

*/

gen empshare_f1000_ind_1907 = (gewpersIIId4 + gewpersXXIb1 + gewpersVIe3 + gewpersVIk3 + gewpersVIId4 + gewpersIIId1 + gewpersVIk1 + gewpersVIe2) / total_empl
label var empshare_f1000_ind_1907 "Employment share in industries where at least 50% work in firms with 1000 employees or more, 1907"

gen popshare_f1000_ind_1907 = (gewpersIIId4 + gewpersXXIb1 + gewpersVIe3 + gewpersVIk3 + gewpersVIId4 + gewpersIIId1 + gewpersVIk1 + gewpersVIe2) / total_pop
label var popshare_f1000_ind_1907 "Population share in industries where at least 50% work in firms with 1000 employees or more, 1907"

* Average firmsize in industry 1907

gen firmsize_industry_1907 = gewpersB / (hbetrB)
label var firmsize_industry_1907 "Average firmsize in industry, 1907"

** Create HHI index for employment concentration in industry

* Global for 3-digit sectors in industry
global sectors3d "				IIIa1	IIIa2	IIIb1	IIIb2	IIIb3	IIIc1	IIIc2	IIId1	IIId2	IIId3	IIId4	IIId5	IIIe1	IIIe2	IIIe3	IIIf	IVa1	IVa2	IVa3	IVa4	IVa5	IVa6	IVa7	IVa8	IVa9	IVa10	IVb1	IVb2	IVc1	IVc2	IVc3	IVc4	IVc5	IVc6	IVc7	IVd1	IVd2	IVd3	IVd4	IVe1	IVe2	IVe3	IVe4	IVe5	IVe6	IVe7	IVf1	IVf2	Va1		Va2		Va3		Va4		Vb1		Vb2		Vb3		Vb4		Vb5		Vb6		Vb7		Vb8		Vb9		Vb10	Vb11	Vb12	Vc1		Vc2		Vc3		Vc4		Vc5		Vc6		Vc7		Vc8		Vc9		Vc10	Vc11	Vc12	Vc13	Vc14	VIa1	VIa2	VIa3	VIa4	VIa5	VIa6	VIa7	VIa8	VIa9	VIa10	VIa11	VIa12	VIa13	VIa14	VIa15	VIa16	VIa17	VIa18	VIb		VIc1	VIc2	VIc3	VIc4	VId		VIe1	VIe2	VIe3	VIf		VIg1	VIg2	VIg3	VIg4	VIg5	VIh1	VIh2	VIh3	VIi1	VIi2	VIk1	VIk2	VIk3	VIk4	VIk5	VIk6	VIk7	VIIa	VIIb	VIIc	VIId1	VIId2	VIId3	VIId4	VIId5	VIIe1	VIIe2	VIIe3	VIIf	VIIIa1	VIIIa2	VIIIb	VIIIc1	VIIIc2	VIIId	VIIIe1	VIIIe2	VIIIe3	VIIIe4	IXa1	IXa2	IXa3	IXb1	IXb2	IXb3	IXb4	IXb5	IXb6	IXb7	IXb8	IXb9	IXc1	IXc2	IXc3	IXc4	IXc5	IXc6	IXc7	IXd1	IXd2	IXe		IXf1	IXf2	IXg1	IXg2	IXg3	IXg4	IXg5	IXg6	IXg7	IXh		IXi1	IXi2	IXk1	IXk2	Xa1		Xa2		Xa3		Xa4		Xa5		Xa6		Xa7		Xa8		Xb1		Xb2		XIa1	XIa2	XIa3	XIb		XIc		XId		XIe1	XIe2	XIe3	XIe4	XIf1	XIf2	XIf3	XIIa1	XIIa2	XIIb1	XIIb2	XIIb3	XIIb4	XIIb5	XIIb6	XIIb7	XIIb8	XIIb9	XIIb10	XIIc	XIId	XIIe	XIIf	XIIg1	XIIg2	XIIg3	XIIh1	XIIh2	XIIh3	XIIIa1	XIIIa2	XIIIa3	XIIIa4	XIIIa5	XIIIa6	XIIIa7	XIIIa8	XIIIa9	XIIIa10	XIIIa11	XIIIb1	XIIIb2	XIIIb3	XIIIb4	XIIIb5	XIIIc	XIIId	XIIIe1	XIIIe2	XIIIe3	XIIIe4	XIIIe5	XIIIe6	XIIIe7	XIIIe8	XIIIf	XIVa1	XIVa2	XIVa3	XIVa4	XIVa5	XIVa6	XIVa7	XIVa8	XIVa9	XIVa10	XIVa11	XIVb	XVa		XVb1	XVb2	XVc1	XVc2	XVc3	XVIa1	XVIa2	XVIa3	XVIb	XVIc	XVId	XVIe	XVIf	XVIg	XVIh	XVIi	XVIk	XVIl	XVIm	XVIn	XVIo	XVIp	XVIIa	XVIIb	XVIIc1	XVIIc2	XVIIc3	XVIIc4	XVIId	XVIIIa1	XVIIIa2	XVIIIb	XVIIIc	XVIIId	"

gen empshare_hhi_ind_1907 = 0
foreach sector in $sectors3d {
gen hhi_summands_`sector' = 	(gewpers`sector' / gewpersB)^2
replace empshare_hhi_ind_1907 = empshare_hhi_ind_1907 + hhi_summands_`sector'
}

label var empshare_hhi_ind_1907 "HHI index of industry concentration, 1907"

*** Save data
rename total_pop total_pop_1907
label var total_pop_1907 "Population in 1907"

keep labor_market_id empshare* popshare* firmsize_industry_1907 total_pop_1907
sort labor_market_id


save "$reversing/processed/intermediate/occ_census1907.dta",replace

*** EOF
